Workflow

Click the nodes to obtain details about each step.

Concordance

Raw Data

Distance

Data

Plot

Truth Evaluation - All Variants

Precision

Raw data

Recall

Truth Evaluation - SNPs

Precision

Raw data

Recall

Truth Evaluation - indels

Raw data

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
samplesheet:          "../../docs/samplesheet.csv"
build_prg_dir:        "../../data/H37Rv_PRG"
baseline_variant_dir: "../baseline_variants"
qc_dir:               "../../data/QC"
compass_vcf_dir:      "../baseline_variants/illumina/gvcfs"
asm_dir:              "../assembly/results"
data_dir:             "../../data/QC/subsampled"
genome_size:          4411532
h37rv:
  genome: "../../data/H37Rv_PRG/resources/h37rv.fa"
  mask:   "../../data/H37Rv_PRG/resources/compass-mask.bed"
containers:
  conda:     "docker://continuumio/miniconda3:4.7.12"
  pandora:   "docker://quay.io/mbhall88/pandora:824ad3b"
  bcftools:  "docker://quay.io/biocontainers/bcftools:1.10.2--hd2cd319_0"
  varifier:  "docker://quay.io/mbhall88/varifier:84e3291"
  snp-dists: "docker://quay.io/biocontainers/snp-dists:0.7.0--hed695b0_0"
  make_prg:  "docker://quay.io/iqballab/make_prg:0.1.1"
envs:
  update_msas:          "envs/update_msas.yaml"
  fd:                   "envs/fd.yaml"
  normalise_pos:        "envs/normalise_pos.yaml"
  plot_truth_eval:      "envs/plot_truth_eval.yaml"
  filter:               "envs/filter.yaml"
  varifier:             "../baseline_variants/envs/varifier.yaml"
  consensus:            "../baseline_variants/envs/consensus.yaml"
  concordance:          "../baseline_variants/envs/concordance.yaml"
  plot_distance_matrix: "../baseline_variants/envs/plot_distance_matrix.yaml"
  extract_snps:         "envs/extract_snps.yaml"
  deduplicate_fasta:    "envs/deduplicate_fasta.yaml"
  multi_dist:           "envs/multisample_distance.yaml"
  create_sample_index:  "envs/create_sample_index.yaml"
  make_genotype_matrix: "envs/genotype_matrix.yaml"
scripts:
  update_msas:            "scripts/update_msas.py"
  make_prg:               "../../data/H37Rv_PRG/scripts/build_local_prgs.sh"
  extract_snps:           "scripts/extract_snps.py"
  aggregate_denovo_paths: "scripts/aggregate_denovo_paths.py"
  make_genotype_matrix:   "scripts/genotype_matrix.py"
  multi_dist:             "scripts/multisample_distance.py"
  normalise_pos:          "scripts/normalise_pos.py"
  plot_truth_eval:        "scripts/plot_truth_eval.py"
  deduplicate_fasta:      "scripts/deduplicate_fasta.sh"
  filter:                 "scripts/apply_filters.py"
  consensus:              "../baseline_variants/scripts/consensus.py"
  concordance:            "../baseline_variants/scripts/concordance.py"
  plot_distance_matrix:   "../baseline_variants/scripts/plot_distance_matrix.py"
  create_sample_index:    "scripts/create_sample_index.py"

# filters for pandora calls
filters:
  min_covg:        3
  max_covg:        0
  min_gt_conf:     15
  min_strand_bias: 1
  max_gaps:        0
  max_indel:       20

assemblies:
  mada_101:  "flye"
  mada_102:  "flye"
  mada_104:  "flye"
  mada_130:  "flye"
  mada_116:  "flye"
  mada_1-44: "flye"
  mada_132:  "flye"
  mada_125:  "flye"

pandora_k:            15  # kmer size for pandora
pandora_w:            14  # window size for pandora
pandora_e:            0.11  # pandora error rate
pandora_E:            0.01  # pandora genotype error rate
prg_names:
  - sparse
  - dense
nesting_level:        5  # for make_prg
match_length:         7  # for make_prg
discover:
  merge_dist:        15
  max_candidate_len: 30
  min_dbg_abundance: 2
  kmer_size:         15
  clean:             false
  max_insertion:     15
  max_nb_paths:      10

Loading...